{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": 3
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python_defaultSpec_1597143650061",
   "display_name": "Python 3.6.4 64-bit ('Softwares': virtualenv)"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 简单日期匹配\n",
    "text = \"星期一，星期二，星期三，星期四，星期五，星期六，星期日，星期1，星期2，星期3，星期4，星期5，星期6，星期7\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pattern = re.compile('星期[^\\d]')\n",
    "titles = re.findall(pattern, text) # 查找所有能匹配的项\n",
    "titles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "titles = re.sub(pattern, r\"星期八\",text) # 替换所有能匹配的项\n",
    "titles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 电影标题匹配\n",
    "text = '<h2 data-v-7f856186=\"\" class=\"m-b-sm\">霸王别姬 - Farewell My Concubine</h2>'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# compile 函数用于编译正则表达式，生成一个Pattern对象，供正则表达式函数使用\n",
    "pattern = re.compile('霸王.姬')\n",
    "\n",
    "# 在字符串中找到正则表达式所匹配的所有子串，并返回一个列表，如果没有找到匹配的，则返回空列表\n",
    "titles = re.findall(pattern, text)\n",
    "titles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pattern = re.compile('<h2.*>(.*?)</h2>')\n",
    "titles = re.findall(pattern, text)\n",
    "titles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}